This R-markdown file presents all analyses for our paper titled “Voice quality and coda /r/ in Glasgow English in the early 20th century”. The analyses are presented in the same order as they are in the paper, and the section numbering also follows our paper (which means that it actually starts at 4.1, which is the first results section in the paper).
We start by importing all relevant libraries and the data. The details of the data processing (e.g. exclusion of outliers) are shown in a separate file titled data_prep.Rmd.
Let’s import the data and relevant libraries first. Note that we have saved our statistical models as RDS files, which allows the user to save time by not having to refit them (some of our generalised additive mixed models may take between 10 minutes to an hour to fit). The user has the option of re-fitting all models by changing the value of the refit_please variable to TRUE below.
library(lme4)
## Warning: package 'lme4' was built under R version 3.5.2
## Loading required package: Matrix
library(mgcv)
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:lme4':
##
## lmList
## This is mgcv 1.8-26. For overview type 'help("mgcv-package")'.
library(itsadug)
## Loading required package: plotfunctions
## Loaded package itsadug 2.3 (see 'help("itsadug")' ).
library(rms)
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:plotfunctions':
##
## alpha
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble 1.4.2 ✔ purrr 0.3.2
## ✔ tidyr 0.8.2 ✔ dplyr 0.7.8
## ✔ readr 1.3.1 ✔ stringr 1.3.1
## ✔ tibble 1.4.2 ✔ forcats 0.3.0
## Warning: package 'purrr' was built under R version 3.5.2
## ── Conflicts ────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::alpha() masks plotfunctions::alpha()
## ✖ dplyr::collapse() masks nlme::collapse()
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::src() masks Hmisc::src()
## ✖ dplyr::summarize() masks Hmisc::summarize()
library(stringr)
library(effects)
## Loading required package: carData
## Use the command
## lattice::trellis.par.set(effectsTheme())
## to customize lattice options for effects plots.
## See ?effectsTheme for details.
library(here)
## here() starts at /Users/soskuthy/Documents/Research/current/2017/jane_glasgow/glasgow-vq-r
library(afex)
## Warning: package 'afex' was built under R version 3.5.2
## ************
## Welcome to afex. For support visit: http://afex.singmann.science/
## - Functions for ANOVAs: aov_car(), aov_ez(), and aov_4()
## - Methods for calculating p-values with mixed(): 'KR', 'S', 'LRT', and 'PB'
## - 'afex_aov' and 'mixed' objects can be passed to emmeans() for follow-up tests
## - NEWS: library('emmeans') now needs to be called explicitly!
## - Get and set global package options with: afex_options()
## - Set orthogonal sum-to-zero contrasts globally: set_sum_contrasts()
## - For example analyses see: browseVignettes("afex")
## ************
##
## Attaching package: 'afex'
## The following object is masked from 'package:lme4':
##
## lmer
refit_please <- F # please don't refit all the models - just load them from RDS files
# vowel data
vowels <- read_csv("data/vowels.csv")
## Parsed with column specification:
## cols(
## Speaker = col_character(),
## gender = col_character(),
## Target.stress = col_character(),
## Target.CELEX.phonemes = col_character(),
## Target.orthography = col_character(),
## Match.segments = col_character(),
## Target.segments = col_character(),
## Target.segments.start = col_double(),
## Target.segments.end = col_double(),
## time_0.5 = col_double(),
## vowel = col_character(),
## preceding = col_character(),
## following = col_character(),
## decade = col_character(),
## decade.fact = col_character(),
## duration = col_double(),
## f1 = col_double(),
## f2 = col_double(),
## f3 = col_double()
## )
# formatting for mixed effects models
vowels$Speaker <- as.factor(vowels$Speaker)
vowels$Target.orthography <- as.factor(vowels$Target.orthography)
vowels$vowel <- as.factor(vowels$vowel)
vowels$decade <- ifelse(vowels$decade=="00", 2000, as.numeric(vowels$decade) + 1900)
vowels$decade.fact <- factor(vowels$decade.fact, levels=c("70","80","90","00"))
# /r/ data
R <- read_csv("data/R.csv") # F/M confuse read_csv...
## Parsed with column specification:
## cols(
## .default = col_double(),
## speaker = col_character(),
## traj = col_character(),
## gender = col_logical(),
## word = col_character(),
## preceding = col_character(),
## stress = col_character(),
## following = col_character(),
## foll.broad.f2 = col_character(),
## foll.broad.f3 = col_character(),
## position = col_character(),
## trans = col_character(),
## exclude = col_logical(),
## comment = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 4554 parsing failures.
## row col expected actual file
## 1090 gender 1/0/T/F/TRUE/FALSE M 'data/R.csv'
## 1091 gender 1/0/T/F/TRUE/FALSE M 'data/R.csv'
## 1092 gender 1/0/T/F/TRUE/FALSE M 'data/R.csv'
## 1093 gender 1/0/T/F/TRUE/FALSE M 'data/R.csv'
## 1094 gender 1/0/T/F/TRUE/FALSE M 'data/R.csv'
## .... ...... .................. ...... ............
## See problems(...) for more details.
coltyps <- spec(R)
coltyps$cols$gender <- col_character()
# and again!
R <- read_csv("data/R.csv", col_types=coltyps)
## Warning: 132 parsing failures.
## row col expected actual file
## 1233 exclude 1/0/T/F/TRUE/FALSE unex 'data/R.csv'
## 1234 exclude 1/0/T/F/TRUE/FALSE unex 'data/R.csv'
## 1235 exclude 1/0/T/F/TRUE/FALSE unex 'data/R.csv'
## 1236 exclude 1/0/T/F/TRUE/FALSE unex 'data/R.csv'
## 1237 exclude 1/0/T/F/TRUE/FALSE unex 'data/R.csv'
## .... ....... .................. ...... ............
## See problems(...) for more details.
# some changes to variables to make them work with GAMMs
R <- R %>%
mutate(stress = as.ordered(stress),
start.event = (measurement.no==0),
foll.broad.f3 = as.factor(foll.broad.f3),
speaker = as.factor(speaker),
word = as.factor(word))
contrasts(R$stress) <- "contr.treatment"
# separate data sets for males and females
R.m <- R %>%
filter(gender=="M")
R.f <- R %>%
filter(gender=="F")
# setting up variables for analysis via GAMMs
R$trans.broad <- recode(R$trans, wa="a")
R$trans.broad.fact <- as.ordered(R$trans.broad)
contrasts(R$trans.broad.fact) <- "contr.treatment"
R$gender.ordered <- as.ordered(R$gender)
contrasts(R$gender.ordered) <- "contr.treatment"
# auditory R data -- create data set with unique realisations
R.aud <- R[!(R$trans %in% c("", "?r")) & R$measurement.no==0,]
# only a few weakened approximants, so fold these into other categories
R.aud <- R %>%
filter(!(trans %in% c("", "?r")),
!is.na(trans),
measurement.no==0) %>%
mutate(trans.broad=recode(trans, wa="a"),
gender=recode(gender, M="male", `F`="female"))
# auditory voice quality data
# getting f3 avgs
avg.f3 <- unique(select(R, speaker, avg.f3))
# reading & formatting data
R.vq <- read_csv("data/auditoryvq.csv") %>%
rename(speaker="speaker code",
advanced.tip.blade="advanced tip/blade",
tongue.body.height="tongue body height",
tongue.body.front.backness="tongue body front-backness") %>%
# join with avg.f3!
inner_join(avg.f3, by="speaker") %>%
mutate(gender=as.factor(ifelse(!is.na(str_match(speaker, "-m0")), "male", "female")),
decade=unlist(substr(speaker, 1, 2)),
decade=as.numeric(recode(decade, `00`="100")) + 1900)
## Parsed with column specification:
## cols(
## `speaker number` = col_double(),
## `speaker code` = col_character(),
## `advanced tip/blade` = col_double(),
## `tongue body height` = col_double(),
## `tongue body front-backness` = col_double(),
## RTR = col_double(),
## nasalization = col_double(),
## whispery = col_double(),
## `sounds?` = col_character()
## )
## Warning: Column `speaker` joining character vector and factor, coercing
## into character vector
Analysing males first, not controlling for baseline F3. Fitting three models:
if (refit_please) {
# model without AR
system.time(
R.m.mod.no.AR <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.m)
)
# saveRDS(R.m.mod.no.AR, "models/R.m.mod.no.AR")
# extracting empirical value of autocorrelation at lag 1
R.m.rho <- start_value_rho(R.m.mod.no.AR)
# full model with AR
system.time(
R.m.mod.AR <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.m, method="ML",
AR.start=R.m$start.event, rho=R.m.rho)
)
# saveRDS(R.m.mod.AR, "models/R.m.mod.AR")
summary(R.m.mod.AR)
# nested model with AR
system.time(
R.m.mod.AR.min.decade <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
#s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
#s(decade, k=4, by=stress) +
# level 2: 2d smooths
#ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.m, method="ML",
AR.start=R.m$start.event, rho=R.m.rho)
)
# saveRDS(R.m.mod.AR.min.decade, "models/R.m.mod.AR.min.decade")
}
R.m.mod.AR <- readRDS("models/R.m.mod.AR")
R.m.mod.AR.min.decade <- readRDS("models/R.m.mod.AR.min.decade")
compareML(R.m.mod.AR, R.m.mod.AR.min.decade) # significant overall effect of decade
## R.m.mod.AR: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
## s(preceding.front, k = 3) + s(measurement.no, by = stress) +
## s(decade, k = 4, by = stress) + ti(measurement.no, decade,
## k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
## preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
## k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
## bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
## m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
## k = 4)
##
## R.m.mod.AR.min.decade: f3 ~ stress + s(measurement.no) + s(duration) + s(preceding.front,
## k = 3) + s(measurement.no, by = stress) + ti(measurement.no,
## duration) + ti(measurement.no, preceding.front, k = c(10,
## 3)) + s(measurement.no, foll.broad.f3, bs = "fs", m = 1,
## k = 4) + s(measurement.no, speaker, bs = "fs", m = 1, k = 4) +
## s(measurement.no, word, bs = "fs", m = 1, k = 4)
##
## Chi-square test of ML scores
## -----
## Model Score Edf Difference Df p.value Sig.
## 1 R.m.mod.AR.min.decade 25015.80 22
## 2 R.m.mod.AR 25006.28 32 9.519 10.000 0.040 *
##
## AIC difference: -0.11, model R.m.mod.AR has lower AIC.
## Warning in compareML(R.m.mod.AR, R.m.mod.AR.min.decade): AIC might not be
## reliable, as an AR1 model is included (rho1 = 0.848517, rho2 = 0.848517).
Further questions (only first two of these discussed in paper):
if (refit_please) {
# overall stress?
system.time(
R.m.mod.AR.min.stress <- bam(f3 ~ #stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
#s(measurement.no, by=stress) +
#s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.m, method="ML",
AR.start=R.m$start.event, rho=R.m.rho)
)
# saveRDS(R.m.mod.AR.min.stress, "models/R.m.mod.AR.min.stress")
# shape change?
system.time(
R.m.mod.AR.min.decade.shape <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
#ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.m, method="ML",
AR.start=R.m$start.event, rho=R.m.rho)
)
# saveRDS(R.m.mod.AR.min.decade.shape, "models/R.m.mod.AR.min.decade.shape")
# overall stress x decade?
system.time(
R.m.mod.AR.min.decade.stress <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
#s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.m, method="ML",
AR.start=R.m$start.event, rho=R.m.rho)
)
# saveRDS(R.m.mod.AR.min.decade.stress, "models/R.m.mod.AR.min.decade.stress")
# shape stress x decade?
system.time(
R.m.mod.AR.min.decade.stress.shape <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.m, method="ML",
AR.start=R.m$start.event, rho=R.m.rho)
)
# saveRDS(R.m.mod.AR.min.decade.stress.shape, "models/R.m.mod.AR.min.decade.stress.shape")
}
R.m.mod.AR.min.stress <- readRDS("models/R.m.mod.AR.min.stress")
R.m.mod.AR.min.decade.shape <- readRDS("models/R.m.mod.AR.min.decade.shape")
R.m.mod.AR.min.decade.stress <- readRDS("models/R.m.mod.AR.min.decade.stress")
R.m.mod.AR.min.decade.stress.shape <- readRDS("models/R.m.mod.AR.min.decade.stress.shape")
# overall stress?
compareML(R.m.mod.AR, R.m.mod.AR.min.stress, suggest.report=T) # non-sig
## R.m.mod.AR: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
## s(preceding.front, k = 3) + s(measurement.no, by = stress) +
## s(decade, k = 4, by = stress) + ti(measurement.no, decade,
## k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
## preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
## k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
## bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
## m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
## k = 4)
##
## R.m.mod.AR.min.stress: f3 ~ s(measurement.no) + s(decade, k = 4) + s(duration) + s(preceding.front,
## k = 3) + ti(measurement.no, decade, k = c(10, 4)) + ti(measurement.no,
## duration) + ti(measurement.no, preceding.front, k = c(10,
## 3)) + s(measurement.no, foll.broad.f3, bs = "fs", m = 1,
## k = 4) + s(measurement.no, speaker, bs = "fs", m = 1, k = 4) +
## s(measurement.no, word, bs = "fs", m = 1, k = 4)
##
## Report suggestion: The Chi-Square test on the ML scores indicates that model R.m.mod.AR is [not significantly?] better than model R.m.mod.AR.min.stress (X2(8.00)=6.385, p>.1).
## -----
## Model Score Edf Difference Df p.value Sig.
## 1 R.m.mod.AR.min.stress 25012.67 24
## 2 R.m.mod.AR 25006.28 32 6.385 8.000 0.120
##
## AIC difference: -8.00, model R.m.mod.AR has lower AIC.
## Warning in compareML(R.m.mod.AR, R.m.mod.AR.min.stress, suggest.report
## = T): AIC might not be reliable, as an AR1 model is included (rho1 =
## 0.848517, rho2 = 0.848517).
# shape change?
compareML(R.m.mod.AR, R.m.mod.AR.min.decade.shape, suggest.report=T) # non-sig
## R.m.mod.AR: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
## s(preceding.front, k = 3) + s(measurement.no, by = stress) +
## s(decade, k = 4, by = stress) + ti(measurement.no, decade,
## k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
## preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
## k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
## bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
## m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
## k = 4)
##
## R.m.mod.AR.min.decade.shape: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
## s(preceding.front, k = 3) + s(measurement.no, by = stress) +
## s(decade, k = 4, by = stress) + ti(measurement.no, duration) +
## ti(measurement.no, preceding.front, k = c(10, 3)) + s(measurement.no,
## foll.broad.f3, bs = "fs", m = 1, k = 4) + s(measurement.no,
## speaker, bs = "fs", m = 1, k = 4) + s(measurement.no, word,
## bs = "fs", m = 1, k = 4)
##
## Report suggestion: The Chi-Square test on the ML scores indicates that model R.m.mod.AR is [not significantly?] better than model R.m.mod.AR.min.decade.shape (X2(6.00)=3.128, p>.1).
## -----
## Model Score Edf Difference Df p.value Sig.
## 1 R.m.mod.AR.min.decade.shape 25009.41 26
## 2 R.m.mod.AR 25006.28 32 3.128 6.000 0.395
##
## AIC difference: 2.06, model R.m.mod.AR.min.decade.shape has lower AIC.
## Warning in compareML(R.m.mod.AR, R.m.mod.AR.min.decade.shape,
## suggest.report = T): AIC might not be reliable, as an AR1 model is included
## (rho1 = 0.848517, rho2 = 0.848517).
## Warning in compareML(R.m.mod.AR, R.m.mod.AR.min.decade.shape, suggest.report = T): Only small difference in ML...
# overall stress x decade?
compareML(R.m.mod.AR, R.m.mod.AR.min.decade.stress, suggest.report=T) # non-sig
## R.m.mod.AR: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
## s(preceding.front, k = 3) + s(measurement.no, by = stress) +
## s(decade, k = 4, by = stress) + ti(measurement.no, decade,
## k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
## preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
## k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
## bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
## m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
## k = 4)
##
## R.m.mod.AR.min.decade.stress: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
## s(preceding.front, k = 3) + s(measurement.no, by = stress) +
## ti(measurement.no, decade, k = c(10, 4)) + ti(measurement.no,
## duration) + ti(measurement.no, preceding.front, k = c(10,
## 3)) + s(measurement.no, foll.broad.f3, bs = "fs", m = 1,
## k = 4) + s(measurement.no, speaker, bs = "fs", m = 1, k = 4) +
## s(measurement.no, word, bs = "fs", m = 1, k = 4)
##
## Report suggestion: The Chi-Square test on the ML scores indicates that model R.m.mod.AR is [not significantly?] better than model R.m.mod.AR.min.decade.stress (X2(5.00)=4.364, p>.1).
## -----
## Model Score Edf Difference Df p.value Sig.
## 1 R.m.mod.AR.min.decade.stress 25010.65 27
## 2 R.m.mod.AR 25006.28 32 4.364 5.000 0.120
##
## AIC difference: -8.76, model R.m.mod.AR has lower AIC.
## Warning in compareML(R.m.mod.AR, R.m.mod.AR.min.decade.stress,
## suggest.report = T): AIC might not be reliable, as an AR1 model is included
## (rho1 = 0.848517, rho2 = 0.848517).
## Warning in compareML(R.m.mod.AR, R.m.mod.AR.min.decade.stress, suggest.report = T): Only small difference in ML...
# shape stress x decade?
compareML(R.m.mod.AR, R.m.mod.AR.min.decade.stress.shape) # non-sig
## R.m.mod.AR: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
## s(preceding.front, k = 3) + s(measurement.no, by = stress) +
## s(decade, k = 4, by = stress) + ti(measurement.no, decade,
## k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
## preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
## k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
## bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
## m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
## k = 4)
##
## R.m.mod.AR.min.decade.stress.shape: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
## s(preceding.front, k = 3) + s(measurement.no, by = stress) +
## s(decade, k = 4, by = stress) + ti(measurement.no, decade,
## k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
## preceding.front, k = c(10, 3)) + s(measurement.no, foll.broad.f3,
## bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
## m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
## k = 4)
##
## Chi-square test of ML scores
## -----
## Model Score Edf Difference Df p.value
## 1 R.m.mod.AR.min.decade.stress.shape 25009.06 29
## 2 R.m.mod.AR 25006.28 32 2.778 3.000 0.135
## Sig.
## 1
## 2
##
## AIC difference: -5.97, model R.m.mod.AR has lower AIC.
## Warning in compareML(R.m.mod.AR, R.m.mod.AR.min.decade.stress.shape): AIC
## might not be reliable, as an AR1 model is included (rho1 = 0.848517, rho2 =
## 0.848517).
## Warning in compareML(R.m.mod.AR, R.m.mod.AR.min.decade.stress.shape): Only small difference in ML...
Creating figure 2 bottom panel showing the model predictions for males.
# extracting model predictions (using plot_smooth from itsadug)
R.m.full.plot <- plot_smooth(R.m.mod.AR, view="measurement.no", plot_all="decade",
cond=list(stress="full"), rm.ranef=T, n.grid=100)$fv
## Warning in plot_smooth(R.m.mod.AR, view = "measurement.no", plot_all =
## "decade", : Predictor decade is not a factor.
## Summary:
## * stress : factor; set to the value(s): full.
## * measurement.no : numeric predictor; with 100 values ranging from 0.000000 to 10.000000.
## * decade : numeric predictor; with 4 values ranging from 1970.000000 to 2000.000000.
## * duration : numeric predictor; set to the value(s): 0.122709575.
## * preceding.front : numeric predictor; set to the value(s): 1.
## * foll.broad.f3 : factor; set to the value(s): labial. (Might be canceled as random effect, check below.)
## * speaker : factor; set to the value(s): 00-O-m06. (Might be canceled as random effect, check below.)
## * word : factor; set to the value(s): were. (Might be canceled as random effect, check below.)
## * NOTE : The following random effects columns are canceled: s(measurement.no,foll.broad.f3),s(measurement.no,speaker),s(measurement.no,word)
##
R.m.schwa.plot <- plot_smooth(R.m.mod.AR, view="measurement.no", plot_all="decade",
cond=list(stress="schwa"), rm.ranef=T, n.grid=100)$fv
## Warning in plot_smooth(R.m.mod.AR, view = "measurement.no", plot_all =
## "decade", : Predictor decade is not a factor.
## Summary:
## * stress : factor; set to the value(s): schwa.
## * measurement.no : numeric predictor; with 100 values ranging from 0.000000 to 10.000000.
## * decade : numeric predictor; with 4 values ranging from 1970.000000 to 2000.000000.
## * duration : numeric predictor; set to the value(s): 0.122709575.
## * preceding.front : numeric predictor; set to the value(s): 1.
## * foll.broad.f3 : factor; set to the value(s): labial. (Might be canceled as random effect, check below.)
## * speaker : factor; set to the value(s): 00-O-m06. (Might be canceled as random effect, check below.)
## * word : factor; set to the value(s): were. (Might be canceled as random effect, check below.)
## * NOTE : The following random effects columns are canceled: s(measurement.no,foll.broad.f3),s(measurement.no,speaker),s(measurement.no,word)
##
R.m.plot <- rbind(R.m.full.plot, R.m.schwa.plot)
# formatting decade predictor
R.m.plot$decade <- factor(R.m.plot$decade, levels=c("1970","1980","1990","2000"))
# creating the plot
ggplot(R.m.plot, aes(x=measurement.no, y=fit, group=decade, col=decade, lty=decade)) +
geom_line(lwd=1) +
facet_grid(.~stress) +
geom_ribbon(aes(ymin=ll, ymax=ul, group=decade), col=NA,col="grey", alpha=0.1) +
scale_color_manual(values=c("orange","darkorange3","deepskyblue1","deepskyblue4"),
name="decade of\nrecording") +
scale_linetype_discrete(name="decade of\nrecording") +
scale_x_continuous(name="measurement point", limits = c(0, 10), breaks=seq(0,10,2)) +
scale_y_continuous(name="F3 (Hz)", limits=c(2000, 3050)) +
theme_bw() +
theme(axis.title = element_text(size=14),
axis.text = element_text(size=12),
legend.title = element_text(size=14, face="bold"),
legend.text=element_text(size=12),
plot.title=element_text(size=14, face="bold"),
panel.grid=element_blank(),
strip.text=element_text(size=12)) +
ggtitle("F3 changes for /r/ in males (not controlling for baseline F3)")
## Warning: Duplicated aesthetics after name standardisation: colour
#ggsave("graphs/r_male_f3_change_no_control.pdf", width=8, height=4)
Analysing females, not controlling for baseline F3. Fitting three models:
if (refit_please) {
# model without AR
system.time(
R.f.mod.no.AR <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.f)
)
# saveRDS(R.f.mod.no.AR, "models/R.f.mod.no.AR")
# extracting empirical value of autocorrelation at lag 1
R.f.rho <- start_value_rho(R.f.mod.no.AR)
# full model with AR
system.time(
R.f.mod.AR <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.f, method="ML",
AR.start=R.f$start.event, rho=R.f.rho)
)
# saveRDS(R.f.mod.AR, "models/R.f.mod.AR")
summary(R.f.mod.AR)
# nested model with AR
system.time(
R.f.mod.AR.min.decade <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
#s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
#s(decade, k=4, by=stress) +
# level 2: 2d smooths
#ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.f, method="ML",
AR.start=R.f$start.event, rho=R.f.rho)
)
# saveRDS(R.f.mod.AR.min.decade, "models/R.f.mod.AR.min.decade")
}
R.f.mod.AR <- readRDS("models/R.f.mod.AR")
R.f.mod.AR.min.decade <- readRDS("models/R.f.mod.AR.min.decade")
compareML(R.f.mod.AR, R.f.mod.AR.min.decade) # significant overall effect of decade
## R.f.mod.AR: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
## s(preceding.front, k = 3) + s(measurement.no, by = stress) +
## s(decade, k = 4, by = stress) + ti(measurement.no, decade,
## k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
## preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
## k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
## bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
## m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
## k = 4)
##
## R.f.mod.AR.min.decade: f3 ~ stress + s(measurement.no) + s(duration) + s(preceding.front,
## k = 3) + s(measurement.no, by = stress) + ti(measurement.no,
## duration) + ti(measurement.no, preceding.front, k = c(10,
## 3)) + s(measurement.no, foll.broad.f3, bs = "fs", m = 1,
## k = 4) + s(measurement.no, speaker, bs = "fs", m = 1, k = 4) +
## s(measurement.no, word, bs = "fs", m = 1, k = 4)
##
## Chi-square test of ML scores
## -----
## Model Score Edf Difference Df p.value Sig.
## 1 R.f.mod.AR.min.decade 25888.44 22
## 2 R.f.mod.AR 25875.82 32 12.623 10.000 0.005 **
##
## AIC difference: -7.77, model R.f.mod.AR has lower AIC.
## Warning in compareML(R.f.mod.AR, R.f.mod.AR.min.decade): AIC might not be
## reliable, as an AR1 model is included (rho1 = 0.850619, rho2 = 0.850619).
Further questions (only first two of these discussed in paper):
if (refit_please) {
# overall stress?
system.time(
R.f.mod.AR.min.stress <- bam(f3 ~ #stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
#s(measurement.no, by=stress) +
#s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.f, method="ML",
AR.start=R.f$start.event, rho=R.f.rho)
)
# saveRDS(R.f.mod.AR.min.stress, "models/R.f.mod.AR.min.stress")
# shape change?
system.time(
R.f.mod.AR.min.decade.shape <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
#ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.f, method="ML",
AR.start=R.f$start.event, rho=R.f.rho)
)
# saveRDS(R.f.mod.AR.min.decade.shape, "models/R.f.mod.AR.min.decade.shape")
# overall stress x decade?
system.time(
R.f.mod.AR.min.decade.stress <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
#s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.f, method="ML",
AR.start=R.f$start.event, rho=R.f.rho)
)
# saveRDS(R.f.mod.AR.min.decade.stress, "models/R.f.mod.AR.min.decade.stress")
# shape stress x decade?
system.time(
R.f.mod.AR.min.decade.stress.shape <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.f, method="ML",
AR.start=R.f$start.event, rho=R.f.rho)
)
# saveRDS(R.f.mod.AR.min.decade.stress.shape, "models/R.f.mod.AR.min.decade.stress.shape")
}
R.f.mod.AR.min.stress <- readRDS("models/R.f.mod.AR.min.stress")
R.f.mod.AR.min.decade.shape <- readRDS("models/R.f.mod.AR.min.decade.shape")
R.f.mod.AR.min.decade.stress <- readRDS("models/R.f.mod.AR.min.decade.stress")
R.f.mod.AR.min.decade.stress.shape <- readRDS("models/R.f.mod.AR.min.decade.stress.shape")
# overall stress?
compareML(R.f.mod.AR, R.f.mod.AR.min.stress, suggest.report=T) # sig
## R.f.mod.AR: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
## s(preceding.front, k = 3) + s(measurement.no, by = stress) +
## s(decade, k = 4, by = stress) + ti(measurement.no, decade,
## k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
## preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
## k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
## bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
## m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
## k = 4)
##
## R.f.mod.AR.min.stress: f3 ~ s(measurement.no) + s(decade, k = 4) + s(duration) + s(preceding.front,
## k = 3) + ti(measurement.no, decade, k = c(10, 4)) + ti(measurement.no,
## duration) + ti(measurement.no, preceding.front, k = c(10,
## 3)) + s(measurement.no, foll.broad.f3, bs = "fs", m = 1,
## k = 4) + s(measurement.no, speaker, bs = "fs", m = 1, k = 4) +
## s(measurement.no, word, bs = "fs", m = 1, k = 4)
##
## Report suggestion: The Chi-Square test on the ML scores indicates that model R.f.mod.AR is [marginally / significantly?] better than model R.f.mod.AR.min.stress (X2(8.00)=9.201, p0.018).
## -----
## Model Score Edf Difference Df p.value Sig.
## 1 R.f.mod.AR.min.stress 25885.02 24
## 2 R.f.mod.AR 25875.82 32 9.201 8.000 0.018 *
##
## AIC difference: -2.30, model R.f.mod.AR has lower AIC.
## Warning in compareML(R.f.mod.AR, R.f.mod.AR.min.stress, suggest.report
## = T): AIC might not be reliable, as an AR1 model is included (rho1 =
## 0.850619, rho2 = 0.850619).
# shape change?
compareML(R.f.mod.AR, R.f.mod.AR.min.decade.shape, suggest.report=T) # sig
## R.f.mod.AR: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
## s(preceding.front, k = 3) + s(measurement.no, by = stress) +
## s(decade, k = 4, by = stress) + ti(measurement.no, decade,
## k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
## preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
## k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
## bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
## m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
## k = 4)
##
## R.f.mod.AR.min.decade.shape: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
## s(preceding.front, k = 3) + s(measurement.no, by = stress) +
## s(decade, k = 4, by = stress) + ti(measurement.no, duration) +
## ti(measurement.no, preceding.front, k = c(10, 3)) + s(measurement.no,
## foll.broad.f3, bs = "fs", m = 1, k = 4) + s(measurement.no,
## speaker, bs = "fs", m = 1, k = 4) + s(measurement.no, word,
## bs = "fs", m = 1, k = 4)
##
## Report suggestion: The Chi-Square test on the ML scores indicates that model R.f.mod.AR is [marginally / significantly?] better than model R.f.mod.AR.min.decade.shape (X2(6.00)=8.210, p0.012).
## -----
## Model Score Edf Difference Df p.value Sig.
## 1 R.f.mod.AR.min.decade.shape 25884.03 26
## 2 R.f.mod.AR 25875.82 32 8.210 6.000 0.012 *
##
## AIC difference: -9.33, model R.f.mod.AR has lower AIC.
## Warning in compareML(R.f.mod.AR, R.f.mod.AR.min.decade.shape,
## suggest.report = T): AIC might not be reliable, as an AR1 model is included
## (rho1 = 0.850619, rho2 = 0.850619).
# overall stress x decade?
compareML(R.f.mod.AR, R.f.mod.AR.min.decade.stress, suggest.report=T) # sig
## R.f.mod.AR: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
## s(preceding.front, k = 3) + s(measurement.no, by = stress) +
## s(decade, k = 4, by = stress) + ti(measurement.no, decade,
## k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
## preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
## k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
## bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
## m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
## k = 4)
##
## R.f.mod.AR.min.decade.stress: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
## s(preceding.front, k = 3) + s(measurement.no, by = stress) +
## ti(measurement.no, decade, k = c(10, 4)) + ti(measurement.no,
## duration) + ti(measurement.no, preceding.front, k = c(10,
## 3)) + s(measurement.no, foll.broad.f3, bs = "fs", m = 1,
## k = 4) + s(measurement.no, speaker, bs = "fs", m = 1, k = 4) +
## s(measurement.no, word, bs = "fs", m = 1, k = 4)
##
## Report suggestion: The Chi-Square test on the ML scores indicates that model R.f.mod.AR is [marginally / significantly?] better than model R.f.mod.AR.min.decade.stress (X2(5.00)=6.830, p0.018).
## -----
## Model Score Edf Difference Df p.value Sig.
## 1 R.f.mod.AR.min.decade.stress 25882.65 27
## 2 R.f.mod.AR 25875.82 32 6.830 5.000 0.018 *
##
## AIC difference: -4.35, model R.f.mod.AR has lower AIC.
## Warning in compareML(R.f.mod.AR, R.f.mod.AR.min.decade.stress,
## suggest.report = T): AIC might not be reliable, as an AR1 model is included
## (rho1 = 0.850619, rho2 = 0.850619).
# shape stress x decade?
compareML(R.f.mod.AR, R.f.mod.AR.min.decade.stress.shape) # sig
## R.f.mod.AR: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
## s(preceding.front, k = 3) + s(measurement.no, by = stress) +
## s(decade, k = 4, by = stress) + ti(measurement.no, decade,
## k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
## preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
## k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
## bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
## m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
## k = 4)
##
## R.f.mod.AR.min.decade.stress.shape: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
## s(preceding.front, k = 3) + s(measurement.no, by = stress) +
## s(decade, k = 4, by = stress) + ti(measurement.no, decade,
## k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
## preceding.front, k = c(10, 3)) + s(measurement.no, foll.broad.f3,
## bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
## m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
## k = 4)
##
## Chi-square test of ML scores
## -----
## Model Score Edf Difference Df p.value
## 1 R.f.mod.AR.min.decade.stress.shape 25880.07 29
## 2 R.f.mod.AR 25875.82 32 4.255 3.000 0.037
## Sig.
## 1
## 2 *
##
## AIC difference: -5.03, model R.f.mod.AR has lower AIC.
## Warning in compareML(R.f.mod.AR, R.f.mod.AR.min.decade.stress.shape): AIC
## might not be reliable, as an AR1 model is included (rho1 = 0.850619, rho2 =
## 0.850619).
## Warning in compareML(R.f.mod.AR, R.f.mod.AR.min.decade.stress.shape): Only small difference in ML...
Creating figure 2 top panel showing the model predictions for females.
# extracting model predictions (using plot_smooth from itsadug)
R.f.full.plot <- plot_smooth(R.f.mod.AR, view="measurement.no", plot_all="decade",
cond=list(stress="full"), rm.ranef=T, n.grid=100)$fv
## Warning in plot_smooth(R.f.mod.AR, view = "measurement.no", plot_all =
## "decade", : Predictor decade is not a factor.
## Summary:
## * stress : factor; set to the value(s): full.
## * measurement.no : numeric predictor; with 100 values ranging from 0.000000 to 10.000000.
## * decade : numeric predictor; with 4 values ranging from 1970.000000 to 2000.000000.
## * duration : numeric predictor; set to the value(s): 0.103923377.
## * preceding.front : numeric predictor; set to the value(s): 1.
## * foll.broad.f3 : factor; set to the value(s): coronal. (Might be canceled as random effect, check below.)
## * speaker : factor; set to the value(s): 00-O-f01. (Might be canceled as random effect, check below.)
## * word : factor; set to the value(s): were. (Might be canceled as random effect, check below.)
## * NOTE : The following random effects columns are canceled: s(measurement.no,foll.broad.f3),s(measurement.no,speaker),s(measurement.no,word)
##
R.f.schwa.plot <- plot_smooth(R.f.mod.AR, view="measurement.no", plot_all="decade",
cond=list(stress="schwa"), rm.ranef=T, n.grid=100)$fv
## Warning in plot_smooth(R.f.mod.AR, view = "measurement.no", plot_all =
## "decade", : Predictor decade is not a factor.
## Summary:
## * stress : factor; set to the value(s): schwa.
## * measurement.no : numeric predictor; with 100 values ranging from 0.000000 to 10.000000.
## * decade : numeric predictor; with 4 values ranging from 1970.000000 to 2000.000000.
## * duration : numeric predictor; set to the value(s): 0.103923377.
## * preceding.front : numeric predictor; set to the value(s): 1.
## * foll.broad.f3 : factor; set to the value(s): coronal. (Might be canceled as random effect, check below.)
## * speaker : factor; set to the value(s): 00-O-f01. (Might be canceled as random effect, check below.)
## * word : factor; set to the value(s): were. (Might be canceled as random effect, check below.)
## * NOTE : The following random effects columns are canceled: s(measurement.no,foll.broad.f3),s(measurement.no,speaker),s(measurement.no,word)
##
R.f.plot <- rbind(R.f.full.plot, R.f.schwa.plot)
# formatting decade predictor
R.f.plot$decade <- factor(R.f.plot$decade, levels=c("1970","1980","1990","2000"))
# creating plot
ggplot(R.f.plot, aes(x=measurement.no, y=fit, group=decade, col=decade, lty=decade)) +
geom_line(lwd=1) +
facet_grid(.~stress) +
geom_ribbon(aes(ymin=ll, ymax=ul, group=decade), col=NA,col="grey", alpha=0.1) +
scale_color_manual(name="decade of\nrecording", values=c("orange","darkorange3","deepskyblue1","deepskyblue4")) +
scale_linetype_discrete(name="decade of\nrecording") +
scale_x_continuous(name="measurement point", limits = c(0, 10), breaks=seq(0,10,2)) +
scale_y_continuous(name="F3 (Hz)", limits=c(2000, 3050)) +
theme_bw() +
theme(axis.title = element_text(size=14),
axis.text = element_text(size=12),
legend.title = element_text(size=14, face="bold"),
legend.text=element_text(size=12),
plot.title=element_text(size=14, face="bold"),
panel.grid=element_blank(),
strip.text=element_text(size=12)) +
ggtitle("F3 changes for /r/ in females (not controlling for baseline F3)")
## Warning: Duplicated aesthetics after name standardisation: colour
#ggsave("graphs/r_female_f3_change_no_control.pdf", width=8, height=4)
First, we fit our model of dynamic F3 measurements as a function of /r/-realisation, whose predictions are plotted in figure 3.
if (refit_please) {
# model without AR
system.time(
R.shapes.mod.no.AR <- bam(f3 ~ stress + trans.broad.fact + gender +
# level 1 smooth terms
s(measurement.no) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(measurement.no, by=trans.broad.fact) +
s(measurement.no, by=gender) +
# level 2: 2d smooths
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R)
)
#saveRDS(R.shapes.mod.no.AR, "models/R.shapes.mod.no.AR")
# extracting empirical autocorrelation value at lag 1
R.shapes.rho <- start_value_rho(R.shapes.mod.no.AR)
# setting up start.event for use with AR1
R$start.event <- R$measurement.no == 0
# full model with AR
system.time(
R.shapes.mod.AR <- bam(f3 ~ stress + trans.broad.fact + gender +
# level 1 smooth terms
s(measurement.no) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(measurement.no, by=trans.broad.fact) +
s(measurement.no, by=gender) +
# level 2: 2d smooths
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R,
AR.start=R$start.event, rho=R.shapes.rho,
method="ML")
)
#saveRDS(R.shapes.mod.AR, "models/R.shapes.mod.AR")
summary(R.shapes.mod.AR)
# nested model
system.time(
R.shapes.mod.AR.min.trans <- bam(f3 ~ stress + gender + # trans.broad.fact +
# level 1 smooth terms
s(measurement.no) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
#s(measurement.no, by=trans.broad.fact) +
s(measurement.no, by=gender) +
# level 2: 2d smooths
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R,
AR.start=R$start.event, rho=R.shapes.rho,
method="ML")
)
#saveRDS(R.shapes.mod.AR.min.trans, "models/R.shapes.mod.AR.min.trans")
}
R.shapes.mod.AR <- readRDS("models/R.shapes.mod.AR")
R.shapes.mod.AR.min.trans <- readRDS("models/R.shapes.mod.AR.min.trans")
compareML(R.shapes.mod.AR, R.shapes.mod.AR.min.trans)
## R.shapes.mod.AR: f3 ~ stress + trans.broad.fact + gender + s(measurement.no) +
## s(duration) + s(preceding.front, k = 3) + s(measurement.no,
## by = stress) + s(measurement.no, by = trans.broad.fact) +
## s(measurement.no, by = gender) + ti(measurement.no, duration) +
## ti(measurement.no, preceding.front, k = c(10, 3)) + s(measurement.no,
## foll.broad.f3, bs = "fs", m = 1, k = 4) + s(measurement.no,
## speaker, bs = "fs", m = 1, k = 4) + s(measurement.no, word,
## bs = "fs", m = 1, k = 4)
##
## R.shapes.mod.AR.min.trans: f3 ~ stress + gender + s(measurement.no) + s(duration) + s(preceding.front,
## k = 3) + s(measurement.no, by = stress) + s(measurement.no,
## by = gender) + ti(measurement.no, duration) + ti(measurement.no,
## preceding.front, k = c(10, 3)) + s(measurement.no, foll.broad.f3,
## bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
## m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
## k = 4)
##
## Chi-square test of ML scores
## -----
## Model Score Edf Difference Df p.value Sig.
## 1 R.shapes.mod.AR.min.trans 50868.40 25
## 2 R.shapes.mod.AR 50752.83 40 115.567 15.000 < 2e-16 ***
##
## AIC difference: -192.66, model R.shapes.mod.AR has lower AIC.
## Warning in compareML(R.shapes.mod.AR, R.shapes.mod.AR.min.trans): AIC
## might not be reliable, as an AR1 model is included (rho1 = 0.860217, rho2 =
## 0.860217).
Now creating figure 3.
# extracting model predictions
R.shapes <- plot_smooth(R.shapes.mod.AR, view="measurement.no", plot_all="trans.broad.fact",
cond=list(gender="F"), rm.ranef=T, rug=F)[[1]]
## Summary:
## * stress : factor; set to the value(s): schwa.
## * trans.broad.fact : factor; set to the value(s): a, i, o, r, t, wt.
## * gender : factor; set to the value(s): F.
## * measurement.no : numeric predictor; with 30 values ranging from 0.000000 to 10.000000.
## * duration : numeric predictor; set to the value(s): 0.11452771.
## * preceding.front : numeric predictor; set to the value(s): 1.
## * foll.broad.f3 : factor; set to the value(s): labial. (Might be canceled as random effect, check below.)
## * speaker : factor; set to the value(s): 00-O-f01. (Might be canceled as random effect, check below.)
## * word : factor; set to the value(s): were. (Might be canceled as random effect, check below.)
## * NOTE : The following random effects columns are canceled: s(measurement.no,foll.broad.f3),s(measurement.no,speaker),s(measurement.no,word)
##
# releveling, meaningful names for realisations
R.shapes$trans.broad.fact <- factor(R.shapes$trans.broad.fact, levels=c("o","i","wt","a","t","r"))
R.shapes <- R.shapes %>%
mutate(realisation=recode(trans.broad.fact,
o="zero",
i="intermediate",
wt="weakened tap",
a="approximant",
t="full tap",
r="trill"),
realisation=factor(realisation,
levels=c("zero",
"intermediate",
"weakened tap",
"approximant",
"full tap",
"trill"))
)
# excluding trills
R.shapes <- filter(R.shapes, trans.broad.fact != "r")
# creating graph
ggplot(R.shapes, aes(x=measurement.no, y=fit, col=realisation, lty=realisation)) +
geom_ribbon(aes(ymin=ll, ymax=ul, group=trans.broad.fact), alpha=0.05, colour=NA) +
geom_line(lwd=1) +
scale_linetype_manual(values = c(3,4,2,
5,1),
name="/r/ realisation") +
scale_colour_manual(values = c("orange","darkorange1",
"darkorange3","deepskyblue1",
"deepskyblue3"),
name="/r/ realisation") +
scale_x_continuous(name="measurement point", limits = c(0, 10), breaks=seq(0,10,2)) +
scale_y_continuous(name="F3 (Hz)", limits=c(2450,2950)) +
theme_bw() +
theme(axis.title = element_text(size=14),
axis.text = element_text(size=12),
legend.title = element_text(size=14, face="bold"),
legend.text=element_text(size=12),
plot.title=element_text(size=14, face="bold"),
panel.grid=element_blank(),
strip.text=element_text(size=12)) +
ggtitle("Acoustic correlates of different /r/ realisations in F3")
#ggsave("graphs/r_acoustic_correlates.pdf", width=6, height=4)
We now create figure 4, that is, the proportions of different realisations as a function of time.
# calculating proportions
R.props.dec <- R.aud %>%
count(decade, gender, trans.broad) %>%
group_by(decade, gender) %>%
mutate(prop = n/sum(n)) %>%
ungroup()
# releveling
R.props.dec$trans.broad <- factor(R.props.dec$trans.broad, levels=c("o","i","wt","a","t","r"))
# creating the graph
ggplot(R.props.dec, aes(x=decade, fill=trans.broad)) +
geom_bar(aes(y=prop), stat="identity", position="stack") +
facet_wrap(~gender) +
scale_fill_manual(values = c("orange","darkorange1",
"darkorange3","deepskyblue1",
"deepskyblue3","deepskyblue4"),
name="/r/ realisation",
labels=c("zero","intermediate",
"weakened tap","approximant",
"tap","trill")) +
scale_y_continuous(labels = scales::percent, name="percentage of realisations") +
scale_x_continuous(name = "decade of recording") +
theme_bw() +
theme(axis.title = element_text(size=14),
axis.text = element_text(size=12),
legend.title = element_text(size=14, face="bold"),
legend.text=element_text(size=12),
plot.title=element_text(size=14, face="bold"),
panel.grid=element_blank(),
strip.text=element_text(size=12)) +
ggtitle("Changes in proportions of auditory /r/ variants")
#ggsave("graphs/r_auditory.pdf", width=8, height=4)
We now run the logistic regression models whose results are reported in section 4.2 of the paper.
# creating a strength of /r/ variable and a presence of /r/ variable (both binary)
R.aud <- mutate(R.aud, r.strength=recode(trans.broad,
t="strong",
a="strong",
r="strong",
wt="weak",
i="weak",
o="weak"),
r.presence=recode(trans.broad,
t="present",
a="present",
r="present",
wt="present",
i="deleted",
o="deleted")
)
R.aud$r.strength <- factor(R.aud$r.strength, levels=c("weak","strong"))
R.aud$r.presence <- factor(R.aud$r.presence, levels=c("deleted","present"))
if (refit_please) {
# note the absence of random slopes over decade by word (not possible to include due to non-convergence)
# full model for strength
r.str.mod <- glmer(r.strength ~ scale(decade) * gender + stress +
(1 | speaker) +
(1 | word),
data=R.aud,
family="binomial")
#saveRDS(r.str.mod, "models/r.str.mod")
# nested model for strength
r.str.mod.min.decade <- glmer(r.strength ~ gender + stress +
(1 | speaker) +
(1 | word),
data=R.aud,
family="binomial")
#saveRDS(r.str.mod.min.decade, "models/r.str.mod.min.decade")
# note the absence of random slopes over decade by word (not possible to include due to non-convergence)
# full model for /r/ presence
r.pres.mod <- glmer(r.presence ~ scale(decade) * gender + stress +
(1 | speaker) +
(1 | word),
data=R.aud,
family="binomial",
control=glmerControl(optimizer="bobyqa",
optCtrl=list(maxfun=2e5)))
#saveRDS(r.pres.mod, "models/r.pres.mod")
# nested model for /r/ presence
r.pres.mod.min.decade <- glmer(r.presence ~ gender + stress +
(1 | speaker) +
(1 | word),
data=R.aud,
family="binomial")
#saveRDS(r.pres.mod.min.decade, "models/r.pres.mod.min.decade")
}
r.str.mod <- readRDS("models/r.str.mod")
r.str.mod.min.decade <- readRDS("models/r.str.mod.min.decade")
r.pres.mod <- readRDS("models/r.pres.mod")
r.pres.mod.min.decade <- readRDS("models/r.pres.mod.min.decade")
summary(r.str.mod)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: r.strength ~ scale(decade) * gender + stress + (1 | speaker) +
## (1 | word)
## Data: R.aud
##
## AIC BIC logLik deviance df.resid
## 1031.4 1064.2 -508.7 1017.4 799
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6444 -0.7391 -0.5031 0.9313 3.7346
##
## Random effects:
## Groups Name Variance Std.Dev.
## word (Intercept) 0.4511 0.6717
## speaker (Intercept) 0.3346 0.5784
## Number of obs: 806, groups: word, 138; speaker, 24
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.456095 0.254805 -1.790 0.0735 .
## scale(decade) -0.275813 0.206426 -1.336 0.1815
## gendermale 0.028435 0.287716 0.099 0.9213
## stressschwa -0.004962 0.185419 -0.027 0.9786
## scale(decade):gendermale 0.051526 0.288656 0.179 0.8583
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) scl(d) gndrml strsss
## scale(decd) 0.031
## gendermale -0.590 -0.029
## stressschwa -0.469 0.001 0.023
## scl(dcd):gn -0.014 -0.714 0.028 -0.027
summary(r.pres.mod)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: r.presence ~ scale(decade) * gender + stress + (1 | speaker) +
## (1 | word)
## Data: R.aud
## Control:
## glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
##
## AIC BIC logLik deviance df.resid
## 857.4 890.2 -421.7 843.4 799
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7662 -0.6148 0.3946 0.5393 1.7176
##
## Random effects:
## Groups Name Variance Std.Dev.
## word (Intercept) 0.6589 0.8117
## speaker (Intercept) 0.3310 0.5753
## Number of obs: 806, groups: word, 138; speaker, 24
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.52029 0.29174 5.211 1.88e-07 ***
## scale(decade) -0.30539 0.21109 -1.447 0.1480
## gendermale 0.41775 0.30091 1.388 0.1650
## stressschwa -0.47407 0.21591 -2.196 0.0281 *
## scale(decade):gendermale 0.03613 0.29750 0.121 0.9033
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) scl(d) gndrml strsss
## scale(decd) -0.026
## gendermale -0.502 0.025
## stressschwa -0.510 0.026 -0.023
## scl(dcd):gn 0.027 -0.712 -0.034 -0.045
# model comparisons reported in paper
anova(r.str.mod, r.str.mod.min.decade,test="Chisq")
anova(r.pres.mod, r.pres.mod.min.decade,test="Chisq")
First some plots showing the raw data for F1/F2/F3 broken down by vowels (these are especially useful as they show that all vowels show a rise in F39.
ggplot(vowels, aes(x=decade.fact, y=f1)) +
facet_grid(gender ~ vowel) +
geom_boxplot()
ggplot(vowels, aes(x=decade.fact, y=f2)) +
facet_grid(gender ~ vowel) +
geom_boxplot()
ggplot(vowels, aes(x=decade.fact, y=f3)) +
facet_grid(gender ~ vowel) +
geom_boxplot()
We’ll fit linear mixed models to F1/F2/F3, running the models in separate R-markdown chunks.
First F1:
# this is the model that we would like to have - but it
# does not converge due to a singularity involving
# the decade by vowel random slope
# f1.mod.full <- lmer(f1 ~ gender * decade.fact +
# scale(duration) +
# (1 + decade.fact | vowel) +
# (1 | Target.orthography) +
# (1 | Speaker),
# data=vowels, REML=F)
# these are two alternative models; model 1 forces the decade
# effect to be linear
if (refit_please) {
f1.mod.full.1 <- lmer(f1 ~ gender * scale(decade) +
scale(duration) +
(1 + scale(decade) | vowel) +
(1 | Target.orthography) +
(1 | Speaker),
data=vowels, REML=F)
# saveRDS(f1.mod.full.1, "models/f1.mod.full.1")
# model 2 has no by-vowel random slope for decade (but the decade
# effect is allowed to be non-linear)
f1.mod.full.2 <- lmer(f1 ~ gender * decade.fact +
scale(duration) +
(1 | vowel) +
(1 | Target.orthography) +
(1 | Speaker),
data=vowels, REML=F)
# saveRDS(f1.mod.full.2, "models/f1.mod.full.2")
# neither model shows a significant effect of decade:
# (and note that model 2 is expected to be anti-conservative with
# respect to decade!)
f1.mod.comp.1 <- lmer(f1 ~ gender + scale(duration) +
(1 + scale(decade) | vowel) +
(1 | Target.orthography) +
(1 | Speaker),
data=vowels, REML=F)
# saveRDS(f1.mod.comp.1, "models/f1.mod.comp.1")
f1.mod.comp.2 <- lmer(f1 ~ gender + scale(duration) +
(1 | vowel) +
(1 | Target.orthography) +
(1 | Speaker),
data=vowels, REML=F)
# saveRDS(f1.mod.comp.2, "models/f1.mod.comp.2")
}
f1.mod.full.1 <- readRDS("models/f1.mod.full.1")
f1.mod.full.2 <- readRDS("models/f1.mod.full.2")
f1.mod.comp.1 <- readRDS("models/f1.mod.comp.1")
f1.mod.comp.2 <- readRDS("models/f1.mod.comp.2")
anova(f1.mod.full.1, f1.mod.comp.1) # non-sig
anova(f1.mod.full.2, f1.mod.comp.2) # non-sig
F1 does not show a significant change.
# this is the model that we would like to have - but it
# does not converge due to a singularity involving
# the decade by vowel random slope
# f2.mod.full <- lmer(f2 ~ gender * decade.fact +
# scale(duration) +
# (1 + decade.fact | vowel) +
# (1 | Target.orthography) +
# (1 | Speaker),
# data=vowels, REML=F)
# these are two alternative models; model 1 forces the decade
# effect to be linear
if (refit_please) {
f2.mod.full.1 <- lmer(f2 ~ gender * scale(decade) +
scale(duration) +
(1 + scale(decade) | vowel) +
(1 | Target.orthography) +
(1 | Speaker),
data=vowels, REML=F)
# saveRDS(f2.mod.full.1, "models/f2.mod.full.1")
# model 2 has no by-vowel random slope for decade (but the decade
# effect is allowed to be non-linear)
f2.mod.full.2 <- lmer(f2 ~ gender * decade.fact +
scale(duration) +
(1 | vowel) +
(1 | Target.orthography) +
(1 | Speaker),
data=vowels, REML=F)
# saveRDS(f2.mod.full.2, "models/f2.mod.full.2")
# neither model shows a significant effect of decade:
# (and note that model 2 is expected to be anti-conservative with
# respect to decade!)
f2.mod.comp.1 <- lmer(f2 ~ gender + scale(duration) +
(1 + scale(decade) | vowel) +
(1 | Target.orthography) +
(1 | Speaker),
data=vowels, REML=F)
# saveRDS(f2.mod.comp.1, "models/f2.mod.comp.1")
f2.mod.comp.2 <- lmer(f2 ~ gender + scale(duration) +
(1 | vowel) +
(1 | Target.orthography) +
(1 | Speaker),
data=vowels, REML=F)
# saveRDS(f2.mod.comp.2, "models/f2.mod.comp.2")
}
f2.mod.full.1 <- readRDS("models/f2.mod.full.1")
f2.mod.comp.1 <- readRDS("models/f2.mod.comp.1")
f2.mod.full.2 <- readRDS("models/f2.mod.full.2")
f2.mod.comp.2 <- readRDS("models/f2.mod.comp.2")
anova(f2.mod.full.1, f2.mod.comp.1) # non-sig
anova(f2.mod.full.2, f2.mod.comp.2) # non-sig
F2 does not show a significant change.
Now for F3:
if (refit_please) {
f3.mod.full <- lmer(f3 ~ gender * decade.fact +
scale(duration) +
(1 + decade.fact | vowel) +
(1 | Target.orthography) +
(1 | Speaker),
data=vowels, REML=F)
# saveRDS(f3.mod.full, "models/f3.mod.full")
#summary(f3.mod.full)
f3.mod.comp <- lmer(f3 ~ gender + scale(duration) +
(1 + decade.fact | vowel) +
(1 | Target.orthography) +
(1 | Speaker),
data=vowels, REML=F)
# saveRDS(f3.mod.comp, "models/f3.mod.comp")
}
f3.mod.full <- readRDS("models/f3.mod.full")
f3.mod.comp <- readRDS("models/f3.mod.comp")
anova(f3.mod.full, f3.mod.comp)
F3 clearly rises significantly over time. Prediction plot for the paper, i.e. figure 5:
# extracting predictions from the model
new.dat <- expand.grid(gender=unique(vowels$gender),
decade.fact=levels(vowels$decade.fact),
duration=median(vowels$duration),
vowel=unique(vowels$vowel)[1])
new.dat$f3 <- 0
new.dat$f3 <- predict(f3.mod.full, new.dat, re.form=NA)
mm <- model.matrix(terms(f3.mod.full),new.dat) # for confidence intervals
pvar1 <- diag(mm %*% tcrossprod(vcov(f3.mod.full),mm))
new.dat <- data.frame(
new.dat,
lower = new.dat$f3-2*sqrt(pvar1),
upper = new.dat$f3+2*sqrt(pvar1)
)
# creating the graph
mean_fun <- function(x){
return(data.frame(y = mean(x), label = paste0(" ", round(mean(x)), " Hz")))
}
ggplot(new.dat, aes(x=decade.fact, y=f3, col=gender)) +
geom_violin(data=vowels, aes(fill=gender, col=NA), alpha=0.2, show.legend=F) +
geom_point(size=2, position=position_dodge(width=0.9)) +
geom_errorbar(aes(ymin=lower, ymax=upper), width=0.25, position=position_dodge(width=0.9)) +
stat_summary(fun.data = mean_fun, geom = "text", size=4, position=position_dodge(0.9), hjust=0, show.legend=F) +
scale_color_manual(name="gender",breaks=c("F","M"), labels=c("female", "male"),
values=c("deepskyblue4","orange")) +
scale_fill_manual(name="gender",breaks=c("F","M"), labels=c("female", "male"),
values=c("deepskyblue4","orange"), guide=F) +
scale_x_discrete(name="decade of recording", labels=c(1970,1980,1990,2000), expand=expand_scale(add=c(0.05,1))) +
scale_y_continuous(name="F3") +
theme_bw() +
theme(axis.title = element_text(size=14),
axis.text = element_text(size=12),
legend.title = element_text(size=14, face="bold"),
legend.text=element_text(size=12),
plot.title=element_text(size=14, face="bold"),
panel.grid=element_blank()) +
ggtitle("F3 changes for vowels (with model predictions)")
#ggsave("graphs/vowels_f3_change.pdf", width=8, height=4)
We first fit a model that shows how F3 correlates with different auditory measures
vq.f3.model <- lm(avg.f3 ~ advanced.tip.blade +
tongue.body.height +
tongue.body.front.backness +
gender, data=R.vq)
summary(vq.f3.model)
##
## Call:
## lm(formula = avg.f3 ~ advanced.tip.blade + tongue.body.height +
## tongue.body.front.backness + gender, data = R.vq)
##
## Residuals:
## Min 1Q Median 3Q Max
## -232.20 -110.79 12.56 90.08 253.19
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2764.70 125.14 22.093 1.72e-14 ***
## advanced.tip.blade -77.74 62.35 -1.247 0.228461
## tongue.body.height 66.50 26.90 2.472 0.023643 *
## tongue.body.front.backness -92.19 52.15 -1.768 0.094008 .
## gendermale -346.30 77.08 -4.493 0.000281 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 165.6 on 18 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6679, Adjusted R-squared: 0.5942
## F-statistic: 9.052 on 4 and 18 DF, p-value: 0.0003441
We then fit three separate models (with decade centred and gender sum coded) to check for decade / gender effects in our three auditory measures.
# centring decade
R.vq$decade.centred <- scale(R.vq$decade, scale=F)
# sum coding gender
R.vq$gender.sum <- R.vq$gender
contrasts(R.vq$gender.sum) <- contr.sum(2)
# fitting and summarising models
summary(lm(tongue.body.height ~ decade.centred + gender.sum, data=R.vq))
##
## Call:
## lm(formula = tongue.body.height ~ decade.centred + gender.sum,
## data = R.vq)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1958 -0.8708 -0.4042 0.8042 2.8458
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.93750 0.27147 -3.453 0.00238 **
## decade.centred 0.04083 0.02428 1.682 0.10744
## gender.sum1 0.52083 0.27147 1.919 0.06874 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.33 on 21 degrees of freedom
## Multiple R-squared: 0.2366, Adjusted R-squared: 0.1639
## F-statistic: 3.254 on 2 and 21 DF, p-value: 0.05873
summary(lm(tongue.body.front.backness ~ decade.centred + gender.sum, data=R.vq))
##
## Call:
## lm(formula = tongue.body.front.backness ~ decade.centred + gender.sum,
## data = R.vq)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.72083 -0.55000 -0.04167 0.37083 2.27917
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.687500 0.150355 -11.223 2.48e-10 ***
## decade.centred -0.009167 0.013448 -0.682 0.503
## gender.sum1 0.270833 0.150355 1.801 0.086 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7366 on 21 degrees of freedom
## Multiple R-squared: 0.1501, Adjusted R-squared: 0.06918
## F-statistic: 1.855 on 2 and 21 DF, p-value: 0.1812
summary(lm(advanced.tip.blade ~ decade.centred + gender.sum, data=R.vq))
##
## Call:
## lm(formula = advanced.tip.blade ~ decade.centred + gender.sum,
## data = R.vq)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.75535 -0.54625 -0.00076 0.40826 0.99924
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.58716 0.12417 12.783 4.42e-11 ***
## decade.centred -0.00818 0.01091 -0.750 0.462
## gender.sum1 0.04549 0.12417 0.366 0.718
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5948 on 20 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.03418, Adjusted R-squared: -0.0624
## F-statistic: 0.3539 on 2 and 20 DF, p-value: 0.7063
Create figure 6.
# getting counts of different tongue body height values per decade
R.vq.count <- dplyr::count(R.vq, tongue.body.height, decade)
# arranging by decade / tongue body height
R.vq.ordered <- R.vq %>%
arrange(decade, tongue.body.height)
# creating graph
ggplot(R.vq.ordered, aes(x=factor(decade), y=tongue.body.height)) +
geom_point(x=1,y=1, aes(colour="Female")) + # this is a hack for getting the legend right
geom_point(x=1,y=1, aes(colour="Male")) + # this is a hack for getting the legend right
scale_color_manual(name="Gender", values=c("deepskyblue4","orange"), breaks=c("Female","Male")) +
geom_dotplot(binaxis="y", stackdir="center", binpositions="all",
fill=ifelse(R.vq.ordered$gender=="female", "deepskyblue4","orange"),
col=NA, dotsize=1.5) +
stat_summary(fun.y=mean, fun.ymin=mean, fun.ymax=mean, geom="crossbar", aes(group=decade), width=0.4, col="grey") +
scale_x_discrete(name="Decade of recording") +
scale_y_continuous(name="Tongue body height") +
guides(colour = guide_legend(override.aes = list(size = 4))) +
theme_bw() +
theme(axis.title = element_text(size=14),
axis.text = element_text(size=12),
legend.title = element_text(size=14, face="bold"),
legend.text=element_text(size=12),
plot.title=element_text(size=14, face="bold"),
panel.grid=element_blank(),
strip.text=element_text(size=12)) +
ggtitle("Changes in tongue body height over time")
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
# ggsave("graphs/tongue_body_height.pdf", width=6, height=4)
Same modelling strategy as above.
if (refit_please) {
# model without AR
system.time(
R.m.baseline.mod.no.AR <- bam(f3 ~ stress +
avg.f3 +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.m)
)
# saveRDS(R.m.baseline.mod.no.AR, "models/R.m.baseline.mod.no.AR")
# extracting empirical autocorrelation values at lag 1
R.m.baseline.rho <- start_value_rho(R.m.baseline.mod.no.AR)
# full model with AR
system.time(
R.m.baseline.mod.AR <- bam(f3 ~ stress +
avg.f3 +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.m, method="ML",
AR.start=R.m$start.event, rho=R.m.baseline.rho)
)
# saveRDS(R.m.baseline.mod.AR, "models/R.m.baseline.mod.AR")
summary(R.m.baseline.mod.AR)
# nested model
system.time(
R.m.baseline.mod.AR.min.decade <- bam(f3 ~ stress +
avg.f3 +
# level 1 smooth terms
s(measurement.no) +
#s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
#s(decade, k=4, by=stress) +
# level 2: 2d smooths
#ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.m, method="ML",
AR.start=R.m$start.event, rho=R.m.baseline.rho)
)
# saveRDS(R.m.baseline.mod.AR.min.decade, "models/R.m.baseline.mod.AR.min.decade")
}
R.m.baseline.mod.AR <- readRDS("models/R.m.baseline.mod.AR")
R.m.baseline.mod.AR.min.decade <- readRDS("models/R.m.baseline.mod.AR.min.decade")
compareML(R.m.baseline.mod.AR, R.m.baseline.mod.AR.min.decade)
## R.m.baseline.mod.AR: f3 ~ stress + avg.f3 + s(measurement.no) + s(decade, k = 4) +
## s(duration) + s(preceding.front, k = 3) + s(measurement.no,
## by = stress) + s(decade, k = 4, by = stress) + ti(measurement.no,
## decade, k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
## preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
## k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
## bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
## m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
## k = 4)
##
## R.m.baseline.mod.AR.min.decade: f3 ~ stress + avg.f3 + s(measurement.no) + s(duration) + s(preceding.front,
## k = 3) + s(measurement.no, by = stress) + ti(measurement.no,
## duration) + ti(measurement.no, preceding.front, k = c(10,
## 3)) + s(measurement.no, foll.broad.f3, bs = "fs", m = 1,
## k = 4) + s(measurement.no, speaker, bs = "fs", m = 1, k = 4) +
## s(measurement.no, word, bs = "fs", m = 1, k = 4)
##
## Chi-square test of ML scores
## -----
## Model Score Edf Difference Df p.value
## 1 R.m.baseline.mod.AR.min.decade 25006.67 23
## 2 R.m.baseline.mod.AR 25001.75 33 4.919 10.000 0.455
## Sig.
## 1
## 2
##
## AIC difference: -0.04, model R.m.baseline.mod.AR has lower AIC.
## Warning in compareML(R.m.baseline.mod.AR, R.m.baseline.mod.AR.min.decade):
## AIC might not be reliable, as an AR1 model is included (rho1 = 0.848567,
## rho2 = 0.848567).
## Warning in compareML(R.m.baseline.mod.AR, R.m.baseline.mod.AR.min.decade): Only small difference in ML...
Creating the bottom panel of figure 7.
# extracting predictions, formatting decade
R.m.baseline.full.plot <- plot_smooth(R.m.baseline.mod.AR, view="measurement.no", plot_all="decade",
cond=list(stress="full"), rm.ranef=T, n.grid=100)$fv
## Warning in plot_smooth(R.m.baseline.mod.AR, view = "measurement.no",
## plot_all = "decade", : Predictor decade is not a factor.
## Summary:
## * stress : factor; set to the value(s): full.
## * avg.f3 : numeric predictor; set to the value(s): 2405.03714710253.
## * measurement.no : numeric predictor; with 100 values ranging from 0.000000 to 10.000000.
## * decade : numeric predictor; with 4 values ranging from 1970.000000 to 2000.000000.
## * duration : numeric predictor; set to the value(s): 0.122709575.
## * preceding.front : numeric predictor; set to the value(s): 1.
## * foll.broad.f3 : factor; set to the value(s): labial. (Might be canceled as random effect, check below.)
## * speaker : factor; set to the value(s): 00-O-m06. (Might be canceled as random effect, check below.)
## * word : factor; set to the value(s): were. (Might be canceled as random effect, check below.)
## * NOTE : The following random effects columns are canceled: s(measurement.no,foll.broad.f3),s(measurement.no,speaker),s(measurement.no,word)
##
R.m.baseline.schwa.plot <- plot_smooth(R.m.baseline.mod.AR, view="measurement.no", plot_all="decade",
cond=list(stress="schwa"), rm.ranef=T, n.grid=100)$fv
## Warning in plot_smooth(R.m.baseline.mod.AR, view = "measurement.no",
## plot_all = "decade", : Predictor decade is not a factor.
## Summary:
## * stress : factor; set to the value(s): schwa.
## * avg.f3 : numeric predictor; set to the value(s): 2405.03714710253.
## * measurement.no : numeric predictor; with 100 values ranging from 0.000000 to 10.000000.
## * decade : numeric predictor; with 4 values ranging from 1970.000000 to 2000.000000.
## * duration : numeric predictor; set to the value(s): 0.122709575.
## * preceding.front : numeric predictor; set to the value(s): 1.
## * foll.broad.f3 : factor; set to the value(s): labial. (Might be canceled as random effect, check below.)
## * speaker : factor; set to the value(s): 00-O-m06. (Might be canceled as random effect, check below.)
## * word : factor; set to the value(s): were. (Might be canceled as random effect, check below.)
## * NOTE : The following random effects columns are canceled: s(measurement.no,foll.broad.f3),s(measurement.no,speaker),s(measurement.no,word)
##
R.m.baseline.plot <- rbind(R.m.baseline.full.plot, R.m.baseline.schwa.plot)
R.m.baseline.plot$decade <- factor(R.m.baseline.plot$decade, levels=c("1970","1980","1990","2000"))
# create graph
ggplot(R.m.baseline.plot, aes(x=measurement.no, y=fit, group=decade, col=decade, lty=decade)) +
geom_line(lwd=1) +
facet_grid(.~stress) +
geom_ribbon(aes(ymin=ll, ymax=ul, group=decade), col=NA,col="grey", alpha=0.1) +
scale_color_manual(name="decade of\nrecording", values=c("orange","darkorange3","deepskyblue1","deepskyblue4")) +
scale_linetype_discrete(name="decade of\nrecording") +
scale_x_continuous(name="measurement point", limits = c(0, 10), breaks=seq(0,10,2)) +
scale_y_continuous(name="F3 (Hz)", limits=c(2000, 3050)) +
theme_bw() +
theme(axis.title = element_text(size=14),
axis.text = element_text(size=12),
legend.title = element_text(size=14, face="bold"),
legend.text=element_text(size=12),
plot.title=element_text(size=14, face="bold"),
panel.grid=element_blank(),
strip.text=element_text(size=12)) +
ggtitle("F3 changes for /r/ in males (controlling for baseline F3)")
## Warning: Duplicated aesthetics after name standardisation: colour
#ggsave("graphs/r_male_f3_change_control.pdf", width=8, height=4)
Same modelling strategy as above.
if (refit_please) {
# model without AR
system.time(
R.f.baseline.mod.no.AR <- bam(f3 ~ stress +
avg.f3 +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.f)
)
# saveRDS(R.f.baseline.mod.no.AR, "models/R.f.baseline.mod.no.AR")
# extracting empirical autocorrelation value at lag 1
R.f.baseline.rho <- start_value_rho(R.f.baseline.mod.no.AR)
# full model with AR
system.time(
R.f.baseline.mod.AR <- bam(f3 ~ stress +
avg.f3 +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.f, method="ML",
AR.start=R.f$start.event, rho=R.f.baseline.rho)
)
# saveRDS(R.f.baseline.mod.AR, "models/R.f.baseline.mod.AR")
summary(R.f.baseline.mod.AR)
# nested model
system.time(
R.f.baseline.mod.AR.min.decade <- bam(f3 ~ stress +
avg.f3 +
# level 1 smooth terms
s(measurement.no) +
#s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
#s(decade, k=4, by=stress) +
# level 2: 2d smooths
#ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.f, method="ML",
AR.start=R.f$start.event, rho=R.f.baseline.rho)
)
# saveRDS(R.f.baseline.mod.AR.min.decade, "models/R.f.baseline.mod.AR.min.decade")
}
R.f.baseline.mod.AR <- readRDS("models/R.f.baseline.mod.AR")
R.f.baseline.mod.AR.min.decade <- readRDS("models/R.f.baseline.mod.AR.min.decade")
compareML(R.f.baseline.mod.AR, R.f.baseline.mod.AR.min.decade)
## R.f.baseline.mod.AR: f3 ~ stress + avg.f3 + s(measurement.no) + s(decade, k = 4) +
## s(duration) + s(preceding.front, k = 3) + s(measurement.no,
## by = stress) + s(decade, k = 4, by = stress) + ti(measurement.no,
## decade, k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
## preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
## k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
## bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
## m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
## k = 4)
##
## R.f.baseline.mod.AR.min.decade: f3 ~ stress + avg.f3 + s(measurement.no) + s(duration) + s(preceding.front,
## k = 3) + s(measurement.no, by = stress) + ti(measurement.no,
## duration) + ti(measurement.no, preceding.front, k = c(10,
## 3)) + s(measurement.no, foll.broad.f3, bs = "fs", m = 1,
## k = 4) + s(measurement.no, speaker, bs = "fs", m = 1, k = 4) +
## s(measurement.no, word, bs = "fs", m = 1, k = 4)
##
## Chi-square test of ML scores
## -----
## Model Score Edf Difference Df p.value
## 1 R.f.baseline.mod.AR.min.decade 25884.98 23
## 2 R.f.baseline.mod.AR 25874.10 33 10.876 10.000 0.016
## Sig.
## 1
## 2 *
##
## AIC difference: -7.88, model R.f.baseline.mod.AR has lower AIC.
## Warning in compareML(R.f.baseline.mod.AR, R.f.baseline.mod.AR.min.decade):
## AIC might not be reliable, as an AR1 model is included (rho1 = 0.850645,
## rho2 = 0.850645).
Creating the top panel of figure 7.
R.f.baseline.full.plot <- plot_smooth(R.f.baseline.mod.AR, view="measurement.no", plot_all="decade",
cond=list(stress="full"), rm.ranef=T, n.grid=100)$fv
## Warning in plot_smooth(R.f.baseline.mod.AR, view = "measurement.no",
## plot_all = "decade", : Predictor decade is not a factor.
## Summary:
## * stress : factor; set to the value(s): full.
## * avg.f3 : numeric predictor; set to the value(s): 2717.01597051597.
## * measurement.no : numeric predictor; with 100 values ranging from 0.000000 to 10.000000.
## * decade : numeric predictor; with 4 values ranging from 1970.000000 to 2000.000000.
## * duration : numeric predictor; set to the value(s): 0.103923377.
## * preceding.front : numeric predictor; set to the value(s): 1.
## * foll.broad.f3 : factor; set to the value(s): coronal. (Might be canceled as random effect, check below.)
## * speaker : factor; set to the value(s): 00-O-f01. (Might be canceled as random effect, check below.)
## * word : factor; set to the value(s): were. (Might be canceled as random effect, check below.)
## * NOTE : The following random effects columns are canceled: s(measurement.no,foll.broad.f3),s(measurement.no,speaker),s(measurement.no,word)
##
R.f.baseline.schwa.plot <- plot_smooth(R.f.baseline.mod.AR, view="measurement.no", plot_all="decade",
cond=list(stress="schwa"), rm.ranef=T, n.grid=100)$fv
## Warning in plot_smooth(R.f.baseline.mod.AR, view = "measurement.no",
## plot_all = "decade", : Predictor decade is not a factor.
## Summary:
## * stress : factor; set to the value(s): schwa.
## * avg.f3 : numeric predictor; set to the value(s): 2717.01597051597.
## * measurement.no : numeric predictor; with 100 values ranging from 0.000000 to 10.000000.
## * decade : numeric predictor; with 4 values ranging from 1970.000000 to 2000.000000.
## * duration : numeric predictor; set to the value(s): 0.103923377.
## * preceding.front : numeric predictor; set to the value(s): 1.
## * foll.broad.f3 : factor; set to the value(s): coronal. (Might be canceled as random effect, check below.)
## * speaker : factor; set to the value(s): 00-O-f01. (Might be canceled as random effect, check below.)
## * word : factor; set to the value(s): were. (Might be canceled as random effect, check below.)
## * NOTE : The following random effects columns are canceled: s(measurement.no,foll.broad.f3),s(measurement.no,speaker),s(measurement.no,word)
##
R.f.baseline.plot <- rbind(R.f.baseline.full.plot, R.f.baseline.schwa.plot)
R.f.baseline.plot$decade <- factor(R.f.baseline.plot$decade, levels=c("1970","1980","1990","2000"))
# creating graph
ggplot(R.f.baseline.plot, aes(x=measurement.no, y=fit, group=decade, col=decade, lty=decade)) +
geom_line(lwd=1) +
facet_grid(.~stress) +
geom_ribbon(aes(ymin=ll, ymax=ul, group=decade), col=NA,col="grey", alpha=0.1) +
scale_color_manual(name="decade of\nrecording", values=c("orange","darkorange3","deepskyblue1","deepskyblue4")) +
scale_linetype_discrete(name="decade of\nrecording") +
scale_x_continuous(name="measurement point", limits = c(0, 10), breaks=seq(0,10,2)) +
scale_y_continuous(name="F3 (Hz)", limits=c(2000, 3050)) +
theme_bw() +
theme(axis.title = element_text(size=14),
axis.text = element_text(size=12),
legend.title = element_text(size=14, face="bold"),
legend.text=element_text(size=12),
plot.title=element_text(size=14, face="bold"),
panel.grid=element_blank(),
strip.text=element_text(size=12)) +
ggtitle("F3 changes for /r/ in females (controlling for baseline F3)")
## Warning: Duplicated aesthetics after name standardisation: colour
#ggsave("graphs/r_female_f3_change_control.pdf", width=8, height=4)